@article{cheng2024scoot,
title={Towards SLO-Optimized LLM Serving via Automatic Inference Engine Tuning},
author={Ke, Cheng and Zhi, Wang and Wen, Hu and Tiannuo, Yang and Jianguo, Li and Sheng, Zhang},
journal={arXiv preprint arXiv:2408.04323},
year={2024}
}